{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "private_outputs": true,
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "# @title Install requirements\n",
        "!git clone https://github.com/InstantID/InstantID\n",
        "%cd InstantID\n",
        "!pip install -r gradio_demo/requirements.txt\n",
        "!pip install timm==0.6.7\n",
        "!pip install diffusers==0.27.2"
      ],
      "metadata": {
        "id": "ZOztpxGQECha"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# @title Download required models\n",
        "\n",
        "!python gradio_demo/download_models.py -y"
      ],
      "metadata": {
        "id": "LdoQC6SuFf3U"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rMcdj5AzEBo-"
      },
      "outputs": [],
      "source": [
        "# @title Set up the pipeline\n",
        "\n",
        "import diffusers\n",
        "from diffusers.utils import load_image\n",
        "from diffusers.models import ControlNetModel\n",
        "from diffusers import AutoencoderKL, DPMSolverMultistepScheduler\n",
        "import cv2\n",
        "import torch\n",
        "import numpy as np\n",
        "from PIL import Image\n",
        "\n",
        "from insightface.app import FaceAnalysis\n",
        "from pipeline_stable_diffusion_xl_instantid_img2img import StableDiffusionXLInstantIDImg2ImgPipeline, draw_kps\n",
        "from controlnet_aux import ZoeDetector\n",
        "\n",
        "# prepare 'antelopev2' under ./models\n",
        "app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
        "app.prepare(ctx_id=0, det_size=(640, 640))\n",
        "\n",
        "# prepare models under ./checkpoints\n",
        "face_adapter = f'./checkpoints/ip-adapter.bin'\n",
        "controlnet_path = f'./checkpoints/ControlNetModel'\n",
        "\n",
        "# load IdentityNet\n",
        "identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
        "zoedepthnet = ControlNetModel.from_pretrained(\"diffusers/controlnet-zoe-depth-sdxl-1.0\",torch_dtype=torch.float16)\n",
        "vae = AutoencoderKL.from_pretrained(\"madebyollin/sdxl-vae-fp16-fix\", torch_dtype=torch.float16)\n",
        "pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(\"rubbrband/albedobaseXL_v21\",\n",
        "                                                                 vae=vae,\n",
        "                                                                 controlnet=[identitynet, zoedepthnet],\n",
        "                                                                 torch_dtype=torch.float16)\n",
        "pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)\n",
        "#pipe.to(\"cuda\")\n",
        "pipe.load_ip_adapter_instantid(face_adapter)\n",
        "pipe.set_ip_adapter_scale(0.8)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# @title Enter an image of your face\n",
        "# @markdown Upload an image of a face to the Files tab\n",
        "# load adapter\n",
        "image_path = \"your_face.png\" # @param {type:\"string\"}\n",
        "\n",
        "face_image = load_image(f\"../{image_path}\")\n",
        "\n",
        "#prepare face emb and kps\n",
        "face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))\n",
        "face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1] # only use the maximum face\n",
        "face_emb = face_info['embedding']\n",
        "face_kps = draw_kps(face_image, face_info['kps'])\n",
        "\n",
        "#prepare face zoe\n",
        "zoe = ZoeDetector.from_pretrained(\"lllyasviel/Annotators\")\n",
        "image_zoe = zoe(face_image)\n",
        "\n",
        "width, height = face_kps.size\n",
        "images = [face_kps, image_zoe.resize((height, width))]"
      ],
      "metadata": {
        "id": "qo-wxg9rNjlW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# @title Load your LoRA!\n",
        "# @markdown You can load a LoRA directly from Hugging Face by browsing [here](https://huggingface.co/models?library=diffusers&other=lora), or download a LoRA from CivitAI/Tensor.Art and place it on the colab folder.\n",
        "pipe.load_lora_weights(\n",
        "    \"Norod78/sdxl-chalkboarddrawing-lora\",\n",
        "    weight_name=\"SDXL_ChalkBoardDrawing_LoRA_r8.safetensors\"\n",
        ")\n",
        "pipe.enable_sequential_cpu_offload()"
      ],
      "metadata": {
        "id": "WqOphy2WQY_A"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# @title Generate!\n",
        "\n",
        "prompt = \"A colorful ChalkBoardDrawing of a man\" # @param {type:\"string\"}\n",
        "negative_prompt = \"blurry, ultra-realism, detailed\" # @param {type:\"string\"}\n",
        "# @markdown The higher the `denoising_strength`, more similar to the original image.\n",
        "denoising_strength = 0.85 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
        "guidance_scale = 7 # @param {type:\"number\"}\n",
        "face_control_strength = 0.8 # @param {type:\"slider\", min:0, max:1, step: 0.01}\n",
        "depth_control_strength = 0.8 # @param {type:\"slider\", min:0, max:1, step: 0.01}\n",
        "\n",
        "image = pipe(\n",
        "    prompt,\n",
        "    negative_prompt=negative_prompt,\n",
        "    width=1024,\n",
        "    height=1024,\n",
        "    image_embeds=face_emb,\n",
        "    image=face_image,\n",
        "    strength=denoising_strength,\n",
        "    control_image=images,\n",
        "    num_inference_steps=20,\n",
        "    guidance_scale = guidance_scale,\n",
        "    controlnet_conditioning_scale=[face_control_strength, depth_control_strength],\n",
        ").images[0]\n",
        "image"
      ],
      "metadata": {
        "id": "ZBenCLFjQgTF"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}
